## This is R code for "The Demand for Insurance: Incorporating the Severity of
## Losing Office into the Insurance Model of Judicial Independence"
## Appendix: cross-validation

rm(list = ls())
pacman::p_load(
  tidyverse,this.path,cowplot,gridExtra
)
## set working directory
setwd(this.path::here(..=1))
## read data
load("Data/dat.RData")

## Preparation -----------------------------------------------------------------
## a. Select the threats variables
dat <- dat %>%
  mutate(
    threat = threat_cumsum,
    threatS = threatS_cumsum
  )

rmse <- function(obs,pred) sqrt(mean((obs-pred)^2)) #function to get RMSE

# Unfortunately a handful of countries only have a handful of years of observations, making partitioning into test and training difficult. 
# Burundi=1, Georiga=2, Guinea-Bissau=3, Kenya=4, Lebanon=4, Lesotho=4, Myanmar=1, Nigeria=3, Paraguay=3, Somalia=2, Uganda=4

cval <- extractdata(LJI ~ threatS  + 
             gdpS + demdurS +
             country_name + year,dat,na.rm=TRUE)
cval <- subset(cval,country_name!="Burundi")
cval <- subset(cval,country_name!="Burma/Myanmar")
cval <- subset(cval,country_name!="Republic of the Congo")

set.seed(1)
infitset <- sample(rownames(cval),size=dim(cval)[[1]]/2) 
#assign rows to a fit set (fit being the set we "train" our model on)
totalset <- rownames(cval) #make a total set
intestset <- setdiff(totalset,infitset) #differentiate
fitset <- cval[infitset,] #make the fit set
testset <- cval[intestset,] #make the test set (to test the coef on)
glm.fit <- glm(LJI ~ threatS  + 
		gdpS + demdurS +
		country_name, fitset,family="gaussian")
preds <- predict(glm.fit,testset)
oos.RMSE <- rmse(testset$LJI,preds)
#rmse(fitset$LJI,fitted(glm.fit))
fitset$preds <- predict(as.vector(glm.fit))
testset$preds <- preds
#rmse(fitset$LJI,fitted(glm.fit))

cross_val_figure1 <- ggplot(fitset,aes(preds,LJI)) + geom_point(alpha=I(1/4),size=2.5) + theme_bw() + scale_x_continuous('Predicted values',limits=c(0,1.1),breaks=c(0,0.5,1)) + scale_y_continuous('Actual values',limits=c(0,1.05),breaks=c(0,0.5,1)) + labs(title="In sample fit") +
theme(plot.title = element_text(size=20)) +  
theme(axis.title.y = element_text(size=14,angle=90),
	axis.text.y=element_text(size=14)) + theme(axis.title.x = element_text(size=14),
	 axis.text.x=element_text(size=14)) + annotate("text",label="RMSE = 0.064",x=0.2,y=1.02,size=5,colour="black") + geom_abline(intercept=0,slope=1,colour="black",size=2) + geom_abline(
		intercept=1.96*(rmse(fitset$LJI,fitted(glm.fit))),
		slope=1,colour="black",lty=2,size=1) + geom_abline(
		intercept=1.96*(-rmse(fitset$LJI,fitted(glm.fit))),
		slope=1,colour="black",lty=2,size=1)
cross_val_figure2 <- ggplot(testset,aes(preds,LJI)) + geom_point(alpha=I(1/4),size=2.5) + theme_bw() + scale_x_continuous('Predicted values',limits=c(0,1.1),breaks=c(0,0.5,1)) + scale_y_continuous('Actual values',limits=c(0,1.05),breaks=c(0, 0.5, 1)) + labs(title="Out of sample fit") +
theme(plot.title = element_text(size=20)) +  
theme(axis.title.y = element_text(size=14,angle=90),
	axis.text.y=element_text(size=14)) + theme(axis.title.x = element_text(size=14),
	 axis.text.x=element_text(size=14)) + annotate("text",label="RMSE = 0.071",x=0.2,y=1.02,size=5,colour="black") + geom_abline(intercept=0,slope=1,colour="black",size=2) + geom_abline(
		intercept=1.96*oos.RMSE,
		slope=1,colour="black",lty=2,size=1) + geom_abline(
		intercept=1.96*-oos.RMSE,
		slope=1,colour="black",lty=2,size=1)
plot_grid(cross_val_figure1, cross_val_figure2,ncol=2,nrow=1)
ggsave("./Figure/cross validation.pdf", width=8.5, height=4.25, units="in")
